Close

@InProceedings{SantosAlme:2020:FaAcCo,
               author = "Santos, Samuel Felipe dos and Almeida, Jurandy",
          affiliation = "{Universidade Federal de S{\~a}o Paulo - UNIFESP} and 
                         {Universidade Federal de S{\~a}o Paulo - UNIFESP}",
                title = "Faster and Accurate Compressed Video Action Recognition Straight 
                         from the Frequency Domain",
            booktitle = "Proceedings...",
                 year = "2020",
               editor = "Musse, Soraia Raupp and Cesar Junior, Roberto Marcondes and 
                         Pelechano, Nuria and Wang, Zhangyang (Atlas)",
         organization = "Conference on Graphics, Patterns and Images, 33. (SIBGRAPI)",
            publisher = "IEEE Computer Society",
              address = "Los Alamitos",
             keywords = "action recognition, convolutional neural network, 
                         compressed-domain processing, frequency domain.",
             abstract = "Human action recognition has become one of the most active field 
                         of research in computer vision due to its wide range of 
                         applications, like surveillance, medical, industrial environments, 
                         smart homes, among others. Recently, deep learning has been 
                         successfully used to learn powerful and interpretable features for 
                         recognizing human actions in videos. Most of the existing deep 
                         learning approaches have been designed for processing video 
                         information as RGB image sequences. For this reason, a preliminary 
                         decoding process is required, since video data are often stored in 
                         a compressed format. However, a high computational load and memory 
                         usage is demanded for decoding a video. To overcome this problem, 
                         we propose a deep neural network capable of learning straight from 
                         compressed video. Our approach was evaluated on two public 
                         benchmarks, the UCF-101 and HMDB-51 datasets, demonstrating 
                         comparable recognition performance to the state-of-the-art 
                         methods, with the advantage of running up to 2 times faster in 
                         terms of inference speed.",
  conference-location = "Porto de Galinhas (virtual)",
      conference-year = "7-10 Nov. 2020",
                  doi = "10.1109/SIBGRAPI51738.2020.00017",
                  url = "http://dx.doi.org/10.1109/SIBGRAPI51738.2020.00017",
             language = "en",
                  ibi = "8JMKD3MGPEW34M/43BDCD8",
                  url = "http://urlib.net/ibi/8JMKD3MGPEW34M/43BDCD8",
           targetfile = "PID6630911.pdf",
        urlaccessdate = "2024, Apr. 27"
}


Close